library(GenomicAlignments)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## Loading required package: BiocParallel
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
## Loading required package: Biostrings
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: Rsamtools
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.3.2 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 1.0.3
## ✔ tidyr 1.0.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks Biostrings::collapse(), IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ purrr::compact() masks XVector::compact()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks GenomicAlignments::first(), S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks GenomicAlignments::last()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ purrr::simplify() masks DelayedArray::simplify()
## ✖ dplyr::slice() masks XVector::slice(), IRanges::slice()
library(cqn)
## Loading required package: mclust
## Package 'mclust' version 5.4.5
## Type 'citation("mclust")' for citing this R package in publications.
##
## Attaching package: 'mclust'
## The following object is masked from 'package:purrr':
##
## map
## Loading required package: nor1mix
## Loading required package: preprocessCore
## Loading required package: splines
## Loading required package: quantreg
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
library(RColorBrewer)
library(edgeR)
## Loading required package: limma
##
## Attaching package: 'limma'
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library(ggplot2)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
library(ggridges)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:BiocGenerics':
##
## combine
colsBig <- clusterExperiment:::massivePalette
plotGCHex <- function(gr, counts){
counts2 <- counts
df <- as_tibble(cbind(counts2,gc=mcols(gr)$gc))
df <- gather(df, sample, value, -gc)
ggplot(data=df, aes(x=gc, y=log(value+1)) ) +
ylab("log(count + 1)") + xlab("GC-content") +
geom_hex(bins = 50) + theme_bw() #+ facet_wrap(~sample, nrow=2)
}
pal <- RColorBrewer::brewer.pal(n=8, "Dark2")
source("../../methods/gcqn_validated.R")
### this dataset combines samples from a number of different sources, therefore the GC effects are wildly different between samples.
data=read.delim("../../data/calderon2019_GSE118189/GSE118189_ATAC_counts.txt.gz")
colnames(data) <- substr(colnames(data),2,nchar(colnames(data)))
# get GC content
rn <- rownames(data)
sn <- unlist(lapply(lapply(strsplit(rn,split="_"),"[",1),function(x) gsub(pattern="chr",x=x,replacement="")))
start <- as.numeric(unlist(lapply(strsplit(rn,split="_"),"[",2)))
end <- as.numeric(unlist(lapply(strsplit(rn,split="_"),"[",3)))
gr <- GRanges(seqnames=sn, ranges=IRanges(start, end), strand="*", mcols=data.frame(peakID=rn))
ff <- FaFile("~/data/genomes/human/Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz")
peakSeqs <- getSeq(x=ff, gr)
gcContentPeaks <- letterFrequency(peakSeqs, "GC",as.prob=TRUE)[,1]
gcGroups <- Hmisc::cut2(gcContentPeaks, g=20)
mcols(gr)$gc <- gcContentPeaks
widthGroups <- Hmisc::cut2(width(gr), g=20)
# get metadata
cnames <- colnames(data)
donor <- unlist(lapply(strsplit(cnames,split=".",fixed=TRUE),"[",1))
celltype <- factor(unlist(lapply(strsplit(cnames,split=".",fixed=TRUE),"[",2)))
condition <- unlist(lapply(strsplit(cnames,split=".",fixed=TRUE),"[",3))
table(celltype,condition)
## condition
## celltype S U
## Bulk_B 3 4
## CD8pos_T 3 4
## Central_memory_CD8pos_T 4 4
## Effector_CD4pos_T 3 4
## Effector_memory_CD8pos_T 4 4
## Follicular_T_Helper 4 5
## Gamma_delta_T 3 4
## Immature_NK 0 5
## Mature_NK 6 4
## Mem_B 4 4
## Memory_NK 0 6
## Memory_Teffs 4 4
## Memory_Tregs 4 4
## Monocytes 6 3
## Myeloid_DCs 0 3
## Naive_B 3 4
## Naive_CD8_T 4 4
## Naive_Teffs 5 4
## Naive_Tregs 2 2
## pDCs 0 3
## Plasmablasts 0 3
## Regulatory_T 4 4
## Th1_precursors 4 4
## Th17_precursors 4 3
## Th2_precursors 4 4
# QC measures
qcMeasures <- openxlsx::read.xlsx(xlsxFile = "../../data/calderon2019_GSE118189/Supplementary_tables.xlsx",
sheet = 5)
qcMeasures <- qcMeasures[,1:4]
qcMeasures$sample <- gsub(x=qcMeasures$sample, pattern="-", replacement=".", fixed=TRUE)
rownames(qcMeasures) <- qcMeasures$sample
qcMeasures <- qcMeasures[colnames(data),]
# batch
addSamples <- read.table("../../data/calderon2019_GSE118189/samples_with_additional_resequencing.txt",
stringsAsFactors = FALSE)[,1]
addSamples <- gsub(x=addSamples, pattern="-", replacement=".", fixed=TRUE)
batch2 <- rep(0, ncol(data))
names(batch2) <- colnames(data)
batch2[addSamples]<- 1
batch2 <- factor(batch2)
batch <- droplevels(interaction(donor, batch2))
table(celltype,condition, batch)
## , , batch = 1001.0
##
## condition
## celltype S U
## Bulk_B 1 1
## CD8pos_T 1 1
## Central_memory_CD8pos_T 1 1
## Effector_CD4pos_T 1 1
## Effector_memory_CD8pos_T 1 1
## Follicular_T_Helper 1 1
## Gamma_delta_T 0 1
## Immature_NK 0 1
## Mature_NK 1 1
## Mem_B 1 1
## Memory_NK 0 1
## Memory_Teffs 1 1
## Memory_Tregs 1 1
## Monocytes 1 1
## Myeloid_DCs 0 1
## Naive_B 1 1
## Naive_CD8_T 1 1
## Naive_Teffs 1 1
## Naive_Tregs 0 0
## pDCs 0 1
## Plasmablasts 0 1
## Regulatory_T 0 0
## Th1_precursors 1 1
## Th17_precursors 1 1
## Th2_precursors 1 1
##
## , , batch = 1002.0
##
## condition
## celltype S U
## Bulk_B 1 1
## CD8pos_T 1 1
## Central_memory_CD8pos_T 1 1
## Effector_CD4pos_T 1 1
## Effector_memory_CD8pos_T 1 1
## Follicular_T_Helper 1 1
## Gamma_delta_T 1 1
## Immature_NK 0 1
## Mature_NK 1 0
## Mem_B 1 1
## Memory_NK 0 1
## Memory_Teffs 1 1
## Memory_Tregs 1 1
## Monocytes 1 0
## Myeloid_DCs 0 1
## Naive_B 1 1
## Naive_CD8_T 1 1
## Naive_Teffs 1 1
## Naive_Tregs 0 0
## pDCs 0 1
## Plasmablasts 0 1
## Regulatory_T 1 1
## Th1_precursors 1 1
## Th17_precursors 1 1
## Th2_precursors 1 1
##
## , , batch = 1003.0
##
## condition
## celltype S U
## Bulk_B 0 0
## CD8pos_T 0 0
## Central_memory_CD8pos_T 0 1
## Effector_CD4pos_T 0 0
## Effector_memory_CD8pos_T 0 0
## Follicular_T_Helper 0 0
## Gamma_delta_T 0 0
## Immature_NK 0 1
## Mature_NK 0 1
## Mem_B 0 0
## Memory_NK 0 0
## Memory_Teffs 0 0
## Memory_Tregs 0 0
## Monocytes 0 1
## Myeloid_DCs 0 0
## Naive_B 0 1
## Naive_CD8_T 1 0
## Naive_Teffs 0 0
## Naive_Tregs 0 0
## pDCs 0 0
## Plasmablasts 0 0
## Regulatory_T 0 0
## Th1_precursors 0 1
## Th17_precursors 0 0
## Th2_precursors 0 0
##
## , , batch = 1004.0
##
## condition
## celltype S U
## Bulk_B 0 1
## CD8pos_T 0 1
## Central_memory_CD8pos_T 1 1
## Effector_CD4pos_T 0 1
## Effector_memory_CD8pos_T 1 1
## Follicular_T_Helper 1 1
## Gamma_delta_T 1 1
## Immature_NK 0 1
## Mature_NK 1 1
## Mem_B 0 1
## Memory_NK 0 1
## Memory_Teffs 1 1
## Memory_Tregs 1 1
## Monocytes 1 1
## Myeloid_DCs 0 0
## Naive_B 0 1
## Naive_CD8_T 1 1
## Naive_Teffs 1 1
## Naive_Tregs 1 1
## pDCs 0 0
## Plasmablasts 0 0
## Regulatory_T 1 1
## Th1_precursors 1 1
## Th17_precursors 1 1
## Th2_precursors 1 1
##
## , , batch = 1008.0
##
## condition
## celltype S U
## Bulk_B 0 0
## CD8pos_T 0 0
## Central_memory_CD8pos_T 0 0
## Effector_CD4pos_T 0 0
## Effector_memory_CD8pos_T 0 0
## Follicular_T_Helper 0 0
## Gamma_delta_T 0 0
## Immature_NK 0 1
## Mature_NK 1 1
## Mem_B 0 0
## Memory_NK 0 1
## Memory_Teffs 0 0
## Memory_Tregs 0 0
## Monocytes 1 0
## Myeloid_DCs 0 1
## Naive_B 0 0
## Naive_CD8_T 0 0
## Naive_Teffs 0 0
## Naive_Tregs 0 1
## pDCs 0 1
## Plasmablasts 0 0
## Regulatory_T 0 0
## Th1_precursors 0 0
## Th17_precursors 0 0
## Th2_precursors 0 0
##
## , , batch = 1010.0
##
## condition
## celltype S U
## Bulk_B 0 0
## CD8pos_T 0 0
## Central_memory_CD8pos_T 0 0
## Effector_CD4pos_T 0 0
## Effector_memory_CD8pos_T 0 0
## Follicular_T_Helper 0 1
## Gamma_delta_T 0 0
## Immature_NK 0 0
## Mature_NK 1 0
## Mem_B 1 0
## Memory_NK 0 1
## Memory_Teffs 0 0
## Memory_Tregs 0 0
## Monocytes 1 0
## Myeloid_DCs 0 0
## Naive_B 0 0
## Naive_CD8_T 0 0
## Naive_Teffs 0 0
## Naive_Tregs 1 0
## pDCs 0 0
## Plasmablasts 0 1
## Regulatory_T 0 0
## Th1_precursors 0 0
## Th17_precursors 0 0
## Th2_precursors 0 0
##
## , , batch = 1011.0
##
## condition
## celltype S U
## Bulk_B 0 0
## CD8pos_T 0 0
## Central_memory_CD8pos_T 0 0
## Effector_CD4pos_T 0 0
## Effector_memory_CD8pos_T 0 0
## Follicular_T_Helper 0 0
## Gamma_delta_T 0 0
## Immature_NK 0 0
## Mature_NK 0 0
## Mem_B 0 0
## Memory_NK 0 0
## Memory_Teffs 0 0
## Memory_Tregs 0 0
## Monocytes 0 0
## Myeloid_DCs 0 0
## Naive_B 0 0
## Naive_CD8_T 0 0
## Naive_Teffs 1 0
## Naive_Tregs 0 0
## pDCs 0 0
## Plasmablasts 0 0
## Regulatory_T 0 0
## Th1_precursors 0 0
## Th17_precursors 0 0
## Th2_precursors 0 0
##
## , , batch = 1001.1
##
## condition
## celltype S U
## Bulk_B 0 0
## CD8pos_T 0 0
## Central_memory_CD8pos_T 0 0
## Effector_CD4pos_T 0 0
## Effector_memory_CD8pos_T 0 0
## Follicular_T_Helper 0 0
## Gamma_delta_T 0 0
## Immature_NK 0 0
## Mature_NK 0 0
## Mem_B 0 0
## Memory_NK 0 0
## Memory_Teffs 0 0
## Memory_Tregs 0 0
## Monocytes 0 0
## Myeloid_DCs 0 0
## Naive_B 0 0
## Naive_CD8_T 0 0
## Naive_Teffs 0 0
## Naive_Tregs 0 0
## pDCs 0 0
## Plasmablasts 0 0
## Regulatory_T 1 1
## Th1_precursors 0 0
## Th17_precursors 0 0
## Th2_precursors 0 0
##
## , , batch = 1003.1
##
## condition
## celltype S U
## Bulk_B 1 1
## CD8pos_T 1 1
## Central_memory_CD8pos_T 1 0
## Effector_CD4pos_T 1 1
## Effector_memory_CD8pos_T 1 1
## Follicular_T_Helper 1 1
## Gamma_delta_T 1 1
## Immature_NK 0 0
## Mature_NK 1 0
## Mem_B 1 1
## Memory_NK 0 1
## Memory_Teffs 1 1
## Memory_Tregs 1 1
## Monocytes 1 0
## Myeloid_DCs 0 0
## Naive_B 1 0
## Naive_CD8_T 0 1
## Naive_Teffs 1 1
## Naive_Tregs 0 0
## pDCs 0 0
## Plasmablasts 0 0
## Regulatory_T 1 1
## Th1_precursors 1 0
## Th17_precursors 1 0
## Th2_precursors 1 1
counts <- as.matrix(data) ; rm(data) ; gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 9687186 517.4 24666078 1317.4 NA 21005730 1121.9
## Vcells 169872716 1296.1 307184796 2343.7 102400 307142329 2343.4
dfGCWidth <- data.frame(gc=gcContentPeaks,
width=width(gr))
ggplot(dfGCWidth, aes(x=gc, y=log(width))) +
geom_hex() +
geom_smooth(se=FALSE, color="red", aes(group=1), lwd=1)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Lowess fits
GC content
lowListGC <- list()
for(kk in 1:ncol(counts)){
set.seed(kk)
lowListGC[[kk]] <- lowess(x=gcContentPeaks, y=log1p(counts[,kk]), f=1/10)
}
for(cc in 1:nlevels(celltype)){
curCT <- levels(celltype)[cc]
id <- which(celltype == curCT)
curBatch <- batch[id]
plot(x=seq(min(gcContentPeaks), max(gcContentPeaks), length=10),
y=seq(0, 4, length=10), type='n',
xlab="GC-content", ylab="log(count + 1)", main=curCT)
for(ii in 1:length(id)){
curID <- id[ii]
oo <- order(lowListGC[[curID]]$x)
lines(x=lowListGC[[curID]]$x[oo], y=lowListGC[[curID]]$y[oo], col=colsBig[batch[curID]])
}
}

























for(bb in 1:nlevels(batch)){
curB <- levels(batch)[bb]
id <- which(batch == curB)
plot(x=seq(min(gcContentPeaks), max(gcContentPeaks), length=10),
y=seq(0, 4, length=10), type='n',
xlab="GC-content", ylab="log(count + 1)", main=curB)
for(ii in 1:length(id)){
curID <- id[ii]
oo <- order(lowListGC[[curID]]$x)
lines(x=lowListGC[[curID]]$x[oo], y=lowListGC[[curID]]$y[oo], col=colsBig[batch[curID]])
}
}









Visualization
lowMemNK <- lowListGC[celltype == "Memory_NK"]
dfList <- list()
for(ss in 1:length(lowMemNK)){
oox <- order(lowMemNK[[ss]]$x)
dfList[[ss]] <- data.frame(x=lowMemNK[[ss]]$x[oox], y=lowMemNK[[ss]]$y[oox], sample=ss)
}
dfAll <- do.call(rbind, dfList)
dfAll$sample <- factor(dfAll$sample)
## association of GC content with counts
plotGCHex(gr, rowMeans(counts[, celltype == "Memory_NK"])) +
theme(axis.title = element_text(size=16)) +
labs(fill="Nr. of peaks") +
geom_line(aes(x=x, y=y, group=sample, color=sample), data=dfAll, size=1) +
scale_color_discrete()

## just the average GC content
p1 <- ggplot(dfAll, aes(x=x, y=y, group=sample, color=sample)) +
geom_line(size = 1) +
xlab("GC-content") +
ylab("log(count + 1)") +
theme_classic()
# rm(lowListGC) ; gc()
# across all cell types
set.seed(44)
pList <- c()
id <- sample(nrow(counts), size=1e4)
for(cc in 1:nlevels(celltype)){
curCT <- levels(celltype)[cc]
lowCT <- lowListGC[celltype == curCT]
dfList <- list()
for(ss in 1:length(lowCT)){
oox <- order(lowCT[[ss]]$x[id])
dfList[[ss]] <- data.frame(x=lowCT[[ss]]$x[id][oox], y=lowCT[[ss]]$y[id][oox], sample=ss)
}
dfAll <- do.call(rbind, dfList)
dfAll$sample <- factor(dfAll$sample)
pCT <- ggplot(dfAll, aes(x=x, y=y, group=sample, color=sample)) +
geom_line(size = 1) +
xlab("GC-content") +
ylab("log(count + 1)") +
theme_classic() +
ggtitle(curCT) +
theme(legend.position = "none") +
ylim(c(0, 3.5))
pList[[cc]] <- pCT
}
cowplot::plot_grid(plotlist=pList, nrow=5, ncol=5)
## Warning: Removed 1 row(s) containing missing values (geom_path).

# ggsave("~/Dropbox/research/atacseq/bulk/plots/gcEffectsAllCells.pdf",
# units="in", width=12, height=9)
# ggsave("~/Dropbox/research/atacseq/bulk/plots/gcEffectsAllCells.png",
# units="in", width=12, height=9)
rm(lowListGC, lowCT, pList) ; gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 10512437 561.5 24666078 1317.4 NA 24666078 1317.4
## Vcells 241394067 1841.7 1272423723 9707.9 102400 1590529653 12134.8
Peak width
lowListWidth <- list()
for(kk in 1:ncol(counts)){
lowListWidth[[kk]] <- lowess(x=log(width(gr)), y=log1p(counts[,kk]), f=1/10)
}
plot(x=seq(min(log(width(gr))), max(log(width(gr))), length=10),
y=seq(0, 5, length=10), type='n',
xlab="GC-content", ylab="log(count + 1)")
for(kk in 1:length(lowListWidth)){
oo <- order(lowListWidth[[kk]]$x)
lines(x=lowListWidth[[kk]]$x[oo], y=lowListWidth[[kk]]$y[oo], col=colsBig[kk])
}

# across all cell types
set.seed(44)
pList <- c()
id <- sample(nrow(counts), size=1e4)
for(cc in 1:nlevels(celltype)){
curCT <- levels(celltype)[cc]
lowCT <- lowListWidth[celltype == curCT]
dfList <- list()
for(ss in 1:length(lowCT)){
oox <- order(lowCT[[ss]]$x[id])
dfList[[ss]] <- data.frame(x=lowCT[[ss]]$x[id][oox], y=lowCT[[ss]]$y[id][oox], sample=ss)
}
dfAll <- do.call(rbind, dfList)
dfAll$sample <- factor(dfAll$sample)
pCT <- ggplot(dfAll, aes(x=x, y=y, group=sample, color=sample)) +
geom_line(size = 1) +
xlab("Log peak width") +
ylab("log(count + 1)") +
theme_classic() +
ggtitle(curCT) +
theme(legend.position = "none") +
ylim(c(0, 4.5))
pList[[cc]] <- pCT
}
cowplot::plot_grid(plotlist=pList, nrow=5, ncol=5)
## Warning: Removed 9 row(s) containing missing values (geom_path).
## Warning: Removed 46 row(s) containing missing values (geom_path).
## Warning: Removed 12 row(s) containing missing values (geom_path).
## Warning: Removed 106 row(s) containing missing values (geom_path).
## Warning: Removed 24 row(s) containing missing values (geom_path).
## Warning: Removed 19 row(s) containing missing values (geom_path).
## Warning: Removed 26 row(s) containing missing values (geom_path).
## Warning: Removed 154 row(s) containing missing values (geom_path).

# ggsave("~/Dropbox/research/atacseq/bulk/plots/widthEffectsAllCells.pdf",
# units="in", width=12, height=9)
# ggsave("~/Dropbox/research/atacseq/bulk/plots/widthEffectsAllCells.png",
# units="in", width=12, height=9)
rm(lowListWidth) ; gc()
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 10473477 559.4 24666078 1317.4 NA 24666078 1317.4
## Vcells 802477496 6122.5 1832466160 13980.7 102400 1832389406 13980.1
Mock comparisons
# Memory_NK cells
memContID <- celltype == "Memory_NK" & condition == "U"
countsMemControl <- counts[,memContID]
keepMemContr <- rowSums(cpm(countsMemControl) >= 2) >=3
countsMemControl <- countsMemControl[keepMemContr, ]
# these are all from different batches.
table(droplevels(batch[memContID]))
##
## 1001.0 1002.0 1004.0 1008.0 1010.0 1003.1
## 1 1 1 1 1 1
# equally sized bins after filtering
gcGroupsMem <- Hmisc::cut2(gcContentPeaks[keepMemContr], g=20)
gcGroupsMem10 <- Hmisc::cut2(gcContentPeaks[keepMemContr], g=10)
gcMem <- gcContentPeaks[keepMemContr]
widthGroupsMem <- Hmisc::cut2(width(gr)[keepMemContr], g=20)
set.seed(33)
mock <- factor(sample(rep(letters[1:2], each=3)))
design <- model.matrix(~mock)
No normalization
## TMM normalization
library(edgeR)
d <- DGEList(countsMemControl)
d <- estimateDisp(d, design)
fit <- glmFit(d, design)
lrt <- glmLRT(fit, coef=2)
dfEdgeR <- data.frame(logFC=log(2^lrt$table$logFC),
gc=gcGroupsMem)
pNone <- ggplot(dfEdgeR, aes(x=gc, y=logFC, color=gc)) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(gcGroups), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
ggtitle("No normalization") +
xlab("GC-content bin") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pNone
## Warning: Removed 447 rows containing non-finite values (stat_ydensity).
## Warning: Removed 447 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 447 rows containing non-finite values (stat_smooth).

dfEdgeRWidth <- data.frame(logFC=log(2^lrt$table$logFC),
peakWidth=widthGroupsMem)
pNoneWidth <- ggplot(dfEdgeRWidth, aes(x=peakWidth, y=logFC, color=peakWidth)) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(dfEdgeRWidth$peakWidth), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
ggtitle("No normalization") +
xlab("Peak width bin") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pNoneWidth
## Warning: Removed 447 rows containing non-finite values (stat_ydensity).
## Warning: Removed 447 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 447 rows containing non-finite values (stat_smooth).

edgeR (TMM normalization)
## TMM normalization
library(edgeR)
d <- DGEList(countsMemControl)
d <- calcNormFactors(d)
d <- estimateDisp(d, design)
fit <- glmFit(d, design)
lrt <- glmLRT(fit, coef=2)
dfEdgeR <- data.frame(logFC=log(2^lrt$table$logFC),
gc=gcGroupsMem)
pedgeR <- ggplot(dfEdgeR, aes(x=gc, y=logFC, color=gc)) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(gcGroups), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
ggtitle("TMM normalization") +
xlab("GC-content bin") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pedgeR
## Warning: Removed 185 rows containing non-finite values (stat_ydensity).
## Warning: Removed 185 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 185 rows containing non-finite values (stat_smooth).

dfEdgeRWidth <- data.frame(logFC=log(2^lrt$table$logFC),
peakWidth=widthGroupsMem)
pedgeRWidth <- ggplot(dfEdgeRWidth, aes(x=peakWidth, y=logFC, color=peakWidth)) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(dfEdgeRWidth$peakWidth), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
ggtitle("TMM normalization") +
xlab("Peak width bin") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pedgeRWidth
## Warning: Removed 185 rows containing non-finite values (stat_ydensity).
## Warning: Removed 185 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 185 rows containing non-finite values (stat_smooth).

DESeq2 (MOR normalization)
## DESeq2 normalization
library(DESeq2)
dds <- DESeqDataSetFromMatrix(countsMemControl,
colData=data.frame(mock=mock),
design=~mock)
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
res <- results(dds, name="mock_b_vs_a")
dfDESeq2 <- data.frame(logFC=log(2^res$log2FoldChange),
gc=gcGroupsMem)
pdeseq <- ggplot(dfDESeq2) +
aes(x=gc, y=logFC, color=gc) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(gcGroups), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
ggtitle("DESeq2 MOR normalization") +
xlab("GC-content bin") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pdeseq
## Warning: Removed 249 rows containing non-finite values (stat_ydensity).
## Warning: Removed 249 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 249 rows containing non-finite values (stat_smooth).

dfDESeq2Width <- data.frame(logFC=log(2^res$log2FoldChange),
peakWidth=widthGroupsMem)
pdeseqWidth <- ggplot(dfDESeq2Width, aes(x=peakWidth, y=logFC, color=peakWidth)) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(dfDESeq2Width$peakWidth), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
ggtitle("DESeq2 MOR normalization") +
xlab("Peak width bin") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pdeseqWidth
## Warning: Removed 249 rows containing non-finite values (stat_ydensity).
## Warning: Removed 249 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 249 rows containing non-finite values (stat_smooth).

Full quantile
## Full quantile normalization
countsFQ <- FQnorm(countsMemControl, type="median")
d <- DGEList(countsFQ)
d <- estimateDisp(d, design)
fit <- glmFit(d, design)
lrtFQ <- glmLRT(fit, coef=2)
dfFQ <- data.frame(logFC=log(2^lrtFQ$table$logFC),
gc=gcGroupsMem)
pFQ <- ggplot(dfFQ) +
aes(x=gc, y=logFC, color=gc) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(gcGroups), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
ggtitle("FQ normalization") +
xlab("GC-content bin") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pFQ
## Warning: Removed 1001 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1001 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1001 rows containing non-finite values (stat_smooth).

dfFQWidth <- data.frame(logFC=log(2^lrtFQ$table$logFC),
peakWidth=widthGroupsMem)
pFQWidth <- ggplot(dfFQWidth, aes(x=peakWidth, y=logFC, color=peakWidth)) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(dfFQWidth$peakWidth), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
ggtitle("FQ normalization") +
xlab("Peak width bin") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pFQWidth
## Warning: Removed 1001 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1001 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1001 rows containing non-finite values (stat_smooth).

Composite plot for figure 1
p <- plot_grid(p1 + ggtitle("Memory NK cells"), pedgeR,
pdeseq, pFQ,
labels=letters[1:4])
## Warning: Removed 185 rows containing non-finite values (stat_ydensity).
## Warning: Removed 185 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 185 rows containing non-finite values (stat_smooth).
## Warning: Removed 249 rows containing non-finite values (stat_ydensity).
## Warning: Removed 249 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 249 rows containing non-finite values (stat_smooth).
## Warning: Removed 1001 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1001 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1001 rows containing non-finite values (stat_smooth).
p

# ggsave("~/Dropbox/research/atacseq/bulk/plots/figure1.png",
# units="in", width=12, height=9)
pWidth <- plot_grid(pedgeRWidth,
pdeseqWidth, pFQWidth,
labels=letters[1:3],
ncol=1)
## Warning: Removed 185 rows containing non-finite values (stat_ydensity).
## Warning: Removed 185 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 185 rows containing non-finite values (stat_smooth).
## Warning: Removed 249 rows containing non-finite values (stat_ydensity).
## Warning: Removed 249 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 249 rows containing non-finite values (stat_smooth).
## Warning: Removed 1001 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1001 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1001 rows containing non-finite values (stat_smooth).
pWidth

# ggsave("~/Dropbox/research/atacseq/bulk/plots/figure1Width.png",
# units="in", width=9, height=10)
Figure 2
##### FIGURE 2
## cqn
cqnModel <- cqn(countsMemControl, x=gcMem, sizeFactors = colSums(countsMemControl),
lengths=width(gr)[keepMemContr])
## Warning: The use of 'sig2' is deprecated; do specify 'sigma' (= sqrt(sig2))
## instead
d <- DGEList(countsMemControl)
d$offset <- cqnModel$glm.offset
d <- estimateDisp(d, design)
fit <- glmFit(d, design)
lrtCqn <- glmLRT(fit, coef=2)
dfCqn <- data.frame(logFC=log(2^lrtCqn$table$logFC),
gc=gcGroupsMem)
pCqn <- ggplot(dfCqn) +
aes(x=gc, y=logFC, color=gc) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(gcGroups), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
xlab("GC-content bin") +
ggtitle("cqn normalization") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pCqn
## Warning: Removed 302 rows containing non-finite values (stat_ydensity).
## Warning: Removed 302 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 302 rows containing non-finite values (stat_smooth).

dfCqnWidth <- data.frame(logFC=log(2^lrtCqn$table$logFC),
peakWidth=widthGroupsMem)
pCqnWidth <- ggplot(dfCqnWidth, aes(x=peakWidth, y=logFC, color=peakWidth)) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(dfCqnWidth$peakWidth), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
xlab("Peak width bin") +
ggtitle("cqn normalization") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pCqnWidth
## Warning: Removed 302 rows containing non-finite values (stat_ydensity).
## Warning: Removed 302 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 302 rows containing non-finite values (stat_smooth).

# ## EDASeq
library(EDASeq)
## Loading required package: ShortRead
##
## Attaching package: 'ShortRead'
## The following object is masked from 'package:dplyr':
##
## id
## The following object is masked from 'package:purrr':
##
## compose
## The following object is masked from 'package:tibble':
##
## view
#emptyRows <- which(rownames(countsMouse) == "")
#rownames(countsMouse)[emptyRows] <- paste0("emptyPeak",1:length(emptyRows))
dataWithin <- withinLaneNormalization(countsMemControl, y=gcMem,
num.bins=20, which="full")
dataNorm <- betweenLaneNormalization(dataWithin, which="full")
d <- DGEList(dataNorm)
d <- estimateDisp(d, design)
## Warning: Zero sample variances detected, have been offset away from zero
fit <- glmFit(d, design)
lrtEDASeq <- glmLRT(fit, coef=2)
dfEDASeq <- data.frame(logFC=log(2^lrtEDASeq$table$logFC),
gc=gcGroupsMem)
pEDASeq <- ggplot(dfEDASeq) +
aes(x=gc, y=logFC, color=gc) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(gcGroups), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
xlab("GC-content bin") +
ggtitle("FQ-FQ normalization") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pEDASeq
## Warning: Removed 1059 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1059 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1059 rows containing non-finite values (stat_smooth).

dfEDASeqWidth <- data.frame(logFC=log(2^lrtEDASeq$table$logFC),
peakWidth=widthGroupsMem)
pEDASeqWidth <- ggplot(dfEDASeqWidth, aes(x=peakWidth, y=logFC, color=peakWidth)) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(dfCqnWidth$peakWidth), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
xlab("Peak width bin") +
ggtitle("FQ-FQ normalization") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pEDASeqWidth
## Warning: Removed 1059 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1059 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1059 rows containing non-finite values (stat_smooth).

## GC-QN
countsGCQN <- gcqn(countsMemControl, gcGroupsMem, summary = "median")
d <- DGEList(countsGCQN)
d <- estimateDisp(d, design)
fit <- glmFit(d, design)
lrtGCQN <- glmLRT(fit, coef=2)
dfGCQN <- data.frame(logFC=log(2^lrtGCQN$table$logFC),
gc=gcGroupsMem)
pGCQN <- ggplot(dfGCQN) +
aes(x=gc, y=logFC, color=gc) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(gcGroups), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
xlab("GC-content bin") +
ggtitle("GC-FQ normalization") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pGCQN
## Warning: Removed 905 rows containing non-finite values (stat_ydensity).
## Warning: Removed 905 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 905 rows containing non-finite values (stat_smooth).

dfGCQNWidth <- data.frame(logFC=log(2^lrtGCQN$table$logFC),
peakWidth=widthGroupsMem)
pGCQNWidth <- ggplot(dfGCQNWidth, aes(x=peakWidth, y=logFC, color=peakWidth)) +
geom_violin() +
geom_boxplot(width=0.1) +
scale_color_manual(values=wesanderson::wes_palette("Zissou1", nlevels(dfCqnWidth$peakWidth), "continuous")) +
geom_abline(intercept = 0, slope = 0, col="black", lty=2) +
theme_bw() +
ylim(c(-1,1)) +
xlab("Peak width bin") +
ggtitle("GC-FQ normalization") +
theme(axis.text.x = element_text(angle = 45, vjust = .5),
legend.position = "none",
axis.title = element_text(size=16)) +
geom_smooth(se=FALSE, color="blue", aes(group=1), lwd=1)
pGCQNWidth
## Warning: Removed 905 rows containing non-finite values (stat_ydensity).
## Warning: Removed 905 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 905 rows containing non-finite values (stat_smooth).

## ridges plot before normalization
countsN <- countsMemControl[,order(colSums(countsMemControl), decreasing=TRUE)[1:3]]
lc <- log1p(c(countsN))
joyDat <- data.frame(lc=lc,
gc=rep(gcGroupsMem10, 3),
sample=rep(1:3, each=nrow(countsN)))
axText <- 0
pRidge1 <- joyDat %>% ggplot(aes(y=gc)) +
geom_density_ridges(aes(x=lc)) +
facet_wrap(.~sample) +
theme_ridges(grid=FALSE, font_size=5, center_axis_labels = TRUE) +
xlim(c(0.5,7)) +
xlab("log(count + 1)") +
ylab("GC-content bin") +
theme(axis.text.y = element_text(size=axText),
axis.text.x = element_text(size=10),
legend.position = "none",
axis.title = element_text(size=16),
strip.background = element_blank(),
strip.text.x = element_blank())
pFC <- cowplot::plot_grid(pCqn, pEDASeq, pGCQN,
labels=letters[2:4],
nrow=3, ncol=1)
## Warning: Removed 302 rows containing non-finite values (stat_ydensity).
## Warning: Removed 302 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 302 rows containing non-finite values (stat_smooth).
## Warning: Removed 1059 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1059 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1059 rows containing non-finite values (stat_smooth).
## Warning: Removed 905 rows containing non-finite values (stat_ydensity).
## Warning: Removed 905 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 905 rows containing non-finite values (stat_smooth).
pFig2 <- cowplot::plot_grid(pRidge1,
pFC,
labels=c("a",""))
## Picking joint bandwidth of 0.0688
## Picking joint bandwidth of 0.143
## Picking joint bandwidth of 0.164
## Warning: Removed 3414 rows containing non-finite values (stat_density_ridges).
pFig2

# ggsave("~/Dropbox/research/atacseq/bulk/plots/figure2.png",
# units="in", width=12, height=9)
pWidth2 <- cowplot::plot_grid(pCqnWidth, pEDASeqWidth, pGCQNWidth,
labels=letters[1:3],
nrow=3, ncol=1)
## Warning: Removed 302 rows containing non-finite values (stat_ydensity).
## Warning: Removed 302 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 302 rows containing non-finite values (stat_smooth).
## Warning: Removed 1059 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1059 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 1059 rows containing non-finite values (stat_smooth).
## Warning: Removed 905 rows containing non-finite values (stat_ydensity).
## Warning: Removed 905 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 905 rows containing non-finite values (stat_smooth).
pWidth2

ggsave("~/Dropbox/research/atacseq/bulk/plots/figure2Width.png",
units="in", width=9, height=10)